Allison <- StateNames_A |>filter(Name =="Allison") |>mutate(Gender =str_replace_all(Gender, c("F"="F_at_Birth", "M"="M_at_Birth"))) |>group_by(State, Gender) |>summarize(Count =sum(Count)) |>pivot_wider(names_from = Gender, values_from = Count, values_fill =0)knitr::kable(Allison, format ='html', col.names =c("State", "Female at Birth","Male at Birth"),align ='c', caption ="Number of Babies Named \'Allison\' from 1997 to 2014")
Number of Babies Named 'Allison' from 1997 to 2014
State
Female at Birth
Male at Birth
AK
232
0
AL
1535
0
AR
1198
0
AZ
1880
0
CA
12413
0
CO
1594
0
CT
1099
0
DC
321
0
DE
294
0
FL
4455
0
GA
3257
0
HI
183
0
IA
1477
0
ID
451
0
IL
5110
0
IN
3067
0
KS
1283
0
KY
1905
20
LA
1209
0
MA
2218
0
MD
2229
0
ME
340
0
MI
4014
0
MN
2374
0
MO
2882
0
MS
817
0
MT
226
0
NC
3435
0
ND
285
0
NE
807
0
NH
412
0
NJ
3052
0
NM
399
0
NV
729
0
NY
5747
0
OH
5487
0
OK
1421
0
OR
1186
0
PA
4307
0
RI
306
0
SC
1228
0
SD
376
0
TN
2488
0
TX
10192
0
UT
1125
0
VA
3220
0
VT
135
0
WA
1956
0
WI
2367
0
WV
813
0
WY
142
0
Code
## Spiced-Up table using DT:DT::datatable(Allison, class ='cell-border stripe',colnames =c("State", "Female at Birth", "Male at Birth"),caption ="Number of Babies Named \'Allison\' from 1997 to 2014",filter ='top')
Question 2.
Code
Allison <- Allison |>select(State, F_at_Birth)knitr::kable(Allison, format ='html', col.names =c("State", "Frequency of Babies"),align ='c', caption ="Number of Female-at-birth Babies Named \'Allison\' from 1997 to 2014")
Number of Female-at-birth Babies Named 'Allison' from 1997 to 2014
State
Frequency of Babies
AK
232
AL
1535
AR
1198
AZ
1880
CA
12413
CO
1594
CT
1099
DC
321
DE
294
FL
4455
GA
3257
HI
183
IA
1477
ID
451
IL
5110
IN
3067
KS
1283
KY
1905
LA
1209
MA
2218
MD
2229
ME
340
MI
4014
MN
2374
MO
2882
MS
817
MT
226
NC
3435
ND
285
NE
807
NH
412
NJ
3052
NM
399
NV
729
NY
5747
OH
5487
OK
1421
OR
1186
PA
4307
RI
306
SC
1228
SD
376
TN
2488
TX
10192
UT
1125
VA
3220
VT
135
WA
1956
WI
2367
WV
813
WY
142
Question 3.
Code
StateNames_Allison <- StateNames_A |>filter(Name =="Allison") |>group_by(Year) |>summarize(Count =sum(Count)) |>mutate(Year_fact =as.factor(Year))ggplot(data = StateNames_Allison) +geom_col(mapping =aes(x = Year_fact, y = Count), fill ="navy") +labs(x ="Year", y ="", subtitle ="Number of babies named \'Allison\'") +scale_x_discrete(guide =guide_axis(n.dodge =2)) +theme(plot.title.position ="plot")
Part 2
Question 4.
Code
Linear_Model <- StateNames_Allison |>lm(Count ~ Year, data = _)
Question 5.
Code
ggplot(data = StateNames_Allison, mapping =aes(x = Year, y = Count)) +geom_point(color ="darkblue") +geom_smooth(method = lm, se =FALSE, color ="tomato")
I do not see any pattern in the residuals, which is good! The two relatively large residuals catch my attention, but they are not so large that they would make me assume the model is incorrect.
What do you conclude from this model? Is my name not cool anymore?
While the model does point out a negative trend, I’m not sure that I would conclude that the name is “not cool” anymore! The name “Achilles,” I think, is very cool but the website https://www.everything-birthday.com/name/m/Achilles suggests, it has had many periods where its popularity “decreased” over time!
Part 3
Question 8.
Code
Allan <- StateNames_A |>filter(Name =="Allan"| Name =="Alan"| Name =="Allen", Gender =="M") |>mutate(Sex = Gender) |>select(Name, Year, State, Count)DT::datatable(Allan, class ='cell-border stripe',colnames =c("Name", "Year", "State","Number of Babies"),caption ="Number of Male-At-Birth Babies With a Variant of the Name \'Allan\'",filter ='top')
Question 9.
Code
CA_PA <- Allan |>pivot_wider(names_from = Name, values_from = Count, values_fill =0) |>filter(State =="CA"| State =="PA", Year ==2000) |>select(State, Allen:Allan)knitr::kable(CA_PA, format ='html', col.names =c("State", "Allen","Alan","Allan"),align ='c', caption ="Number of Male-At-Birth Babies With a Variant of the Name \'Allan\' in 2000")
Number of Male-At-Birth Babies With a Variant of the Name 'Allan' in 2000
State
Allen
Alan
Allan
CA
176
579
131
PA
56
51
12
Question 10.
Code
num_to_Percent <-function(df, vars) { Total <-rowSums(df[vars]) df <- df[vars] / Total *100}CA_PA_Percents <-num_to_Percent(df = CA_PA, vars =c("Allen", "Alan", "Allan"))CA_PA_percents <- CA_PA_Percents |>mutate(State = CA_PA$State)knitr::kable(CA_PA_percents, format ='html', digits =1,col.names =c("Percent Named Allen","Percent Named Alan","Percent Named Allan","State"),align ='c', caption ="Spelling Proportions of Male-At-Birth Babies With a Variant of the Name \'Allan\' in 2000")
Spelling Proportions of Male-At-Birth Babies With a Variant of the Name 'Allan' in 2000